home *** CD-ROM | disk | FTP | other *** search
/ MacTech 1 to 12 / MacTech-vol-1-12.toast / Source / MacTech® Magazine / Volume 07 - 1991 / 07.08 Aug 91 / Browser sources / Search.c < prev    next >
Encoding:
C/C++ Source or Header  |  1991-01-19  |  7.9 KB  |  274 lines  |  [TEXT/KAHL]

  1. /******************************************************/
  2. /*                                                                                                      */
  3. /*    Search.c                                                                                  */
  4. /*                                                                                                      */
  5. /*    Uses the Boyer-Moore search algorithm. Reference: */
  6. /*        "A Fast String Searching Algorithm," Robert S.    */
  7. /*        Boyer and J Strother Moore, CACM v. 20 no. 10      */
  8. /*        (October 1977), pp. 762-772.                                      */
  9. /*    Their original algorithm is modified here to do      */
  10. /*    case-insensitive searches.                                              */
  11. /*                                                                                                      */
  12. /*    Written in Think C version 4.0.2                                  */
  13. /*                                                                                                      */
  14. /*    Allen Stenger    January 1991                                              */
  15. /*                                                                                                      */
  16. /******************************************************/
  17.  
  18. #define DOCOUNT     0    
  19.                                     /* count number of passes in loops     */
  20. #define    DOTIMING    0    /* measure time of searches             */
  21.  
  22. #include <string.h>
  23. #include <ctype.h>
  24. #include "Browser.h"
  25. #include "Search.h"
  26.  
  27. #define LARGE 100000000L
  28.     /* LARGE is picked to be larger than any possible        */
  29.     /* file size.    It is a flag in delta0 indicating         */
  30.     /* that the index character is in the pattern.            */
  31.  
  32. /* These are the Boyer-Moore tables, which tell how     */
  33. /* far the pattern may be shifted for the next trial    */
  34. /* comparison.  delta0 is indexed by unsigned char,        */
  35. /* and delta2 is indexed by position in the pattern.    */
  36. static long            delta0[256];
  37. static short        delta2[256];
  38.  
  39. char                        searchString[256] = {256 *'\0'};
  40.     /* saves the search string */
  41.  
  42. /******************************************************/
  43. /*                                                                                                      */
  44. /*    internal functions                                                              */
  45. /*                                                                                                      */
  46. /******************************************************/
  47.  
  48. /* prototypes for internal functions */
  49.  
  50. /* Calculate the Boyer-Moore delta0 and delta2 tables.*/
  51. /* Tables are calculated assuming upper and lower case*/
  52. /* are the same. The search pattern is the global            */
  53. /* variable searchString, and the tables are stored     */
  54. /* in the global variables delta0 and delta2.                    */
  55.  
  56. static void GetDelta0( void );
  57.  
  58. static void GetDelta2( void );
  59.  
  60. /* Boyer-Moore search method - returns TRUE if string    */
  61. /* found, FALSE otherwise.                                                        */
  62.  
  63. static Boolean                             /* returns whether found    */
  64.     BMSearch( char    *string,    /* target string                     */
  65.                         long    stringlen,/* length of *string            */
  66.                         long *offsetP);    /* output - where found     */
  67.     
  68. /* end prototypes */
  69.  
  70. static void GetDelta0( void )
  71. {
  72.     short            i;                 /* loop control                                 */
  73.     char            *pat = searchString;    /* local copy             */
  74.     long            patlen = strlen(pat); /* local constant     */
  75.     for (i=0; i<256; i++) delta0[i] = patlen; 
  76.     for (i=0; i<patlen; i++) {
  77.         delta0[ (unsigned char) tolower(searchString[i]) ] = 
  78.                     patlen - 1 - i;
  79.         delta0[ (unsigned char) toupper(searchString[i]) ] = 
  80.                     patlen - 1 - i;
  81.         }
  82.     delta0[ (unsigned char) 
  83.         tolower(searchString[patlen-1]) ] = LARGE;
  84.     delta0[ (unsigned char) 
  85.         toupper(searchString[patlen-1]) ] = LARGE;
  86. }
  87.  
  88. static void GetDelta2( void )
  89. {
  90.     #define SameCharAtPos(a, b) ( \
  91.                         (tolower(pat[a]) == tolower(pat[b])) \
  92.                     ||(toupper(pat[a]) == toupper(pat[b]))  )
  93.  
  94.     short        i;                /* index in trial string for rpr     */
  95.     short        j;                /* rpr(j) now being calculated         */
  96.     short        k;                /* trial for rpr                                     */
  97.     short        currentRPR;    /* latest good rpr(j)                     */
  98.     char        *pat = searchString;    /* local copy                 */
  99.     short        patlen = strlen(pat); /* local constant         */
  100.     Boolean    unifies;    /* TRUE if pat[j+1]..pat[patlen-1]*/
  101.                                         /* unifies with                                     */
  102.                                         /* pat[k]..pat[k+patlen-j-2]            */
  103.         
  104.     
  105.     
  106.     for (j=0; j<=patlen-1; j++) { 
  107.         /* Calculate rpr(j). NOTE: our rpr is one less         */
  108.         /* than the Boyer-Moore rpr, because our arrays     */
  109.         /* start at 0 and theirs at 1. The delta2 values    */
  110.         /* are the same, although indexed beginning at 0.    */
  111.         currentRPR = j + 1 - patlen;
  112.         for (k=j+2-patlen; k<=j; k++) { 
  113.             /* check for reoccurrence at pat[k]                         */
  114.             unifies = TRUE;
  115.             
  116.             for (i=0; i<=patlen-2-j; i++) {    
  117.                 if     ( (k+i>=0) && !SameCharAtPos(j+1+i,k+i))
  118.                     unifies = FALSE; 
  119.                 }
  120.             if     ( unifies &&
  121.                         ( (k<1) ||     !SameCharAtPos(k-1,j) ) )
  122.                 currentRPR = k; /* found rightmore k */
  123.             }            
  124.         delta2[j] = patlen - currentRPR;
  125.         }
  126. }
  127.  
  128. static Boolean 
  129.                 BMSearch(    register char    *string,
  130.                                     register long    stringlen, 
  131.                                     long                     *offsetP)
  132. {
  133.     register long        
  134.                     i;    /* index into string of current pointer */
  135.     register short
  136.                     j;    /* index into pat for char-by-char             */
  137.     char        *pat = searchString;    /* local copy                 */
  138.     short        patlen = strlen(pat); /* local constant            */
  139.     register long         
  140.                     d0jump,
  141.                     d2jump; /* sizes of jumps indicated by             */
  142.                                     /* delta0, delta2                                     */
  143.                     
  144.     #if DOCOUNT
  145.         /* number of times through loops                                     */
  146.         long    whileTRUECount = 0;    /* while(TRUE) loop         */
  147.         long    whileICount = 0;    /* while(i) loop                     */
  148.         long    whileJCount = 0;    /* while(j) loop                     */
  149.     #endif
  150.  
  151.     i = patlen - 1;
  152.     if (i >= stringlen) return( FALSE );
  153.     while (TRUE) { 
  154.         #if DOCOUNT
  155.             whileTRUECount++;
  156.         #endif
  157.         
  158.         /* inner loop of Boyer-Moore algorithm                         */
  159.         while( (i += delta0[ (unsigned long) 
  160.                                                  (unsigned char) string[i] ]) 
  161.                         < stringlen ) {
  162.             #if DOCOUNT
  163.                 whileICount++;
  164.             #endif
  165.         };
  166.         
  167.         if (i<LARGE) return( FALSE );
  168.         i -= (LARGE + 1);
  169.         j = patlen - 2;
  170.  
  171.         /* character-by-character comparison                             */
  172.         while ( (j>=0) && 
  173.                         (tolower(string[i]) == tolower(pat[j])) ) {
  174.             #if DOCOUNT 
  175.                 whileJCount++;
  176.             #endif
  177.             --j;
  178.             --i;
  179.             }
  180.         if (j < 0) {
  181.             /* success - whole pattern matched                             */
  182.             *offsetP = i + 1;
  183.             return( TRUE );    
  184.             }
  185.         /* failure - only part of pattern matched - get        */
  186.         /* shifts indicated by delta0 (single-character        */
  187.         /* mismatch) and by delta2 (next plausible                 */
  188.         /* reoccurrence) and take the larger.                            */
  189.         d0jump = delta0[ (unsigned long) 
  190.                                          (unsigned char) string[i] ];
  191.         if (d0jump == LARGE) d0jump = 0;
  192.         d2jump = delta2[j];
  193.         i += (d0jump > d2jump) ? d0jump : d2jump;
  194.         }
  195. }
  196.  
  197. /******************************************************/
  198. /*                                                                                                      */
  199. /*    External functions                                                              */
  200. /*                                                                                                      */
  201. /******************************************************/
  202.  
  203. Boolean GoFind(    Handle    theTextH, 
  204.                                 long         *offsetP)
  205. {
  206.     long        newOffset;        /* where match found                     */
  207.     Boolean    matchFound;        /* whether match found                 */
  208.     #if DOTIMING
  209.         long    startTime, endTime;
  210.         float    elapsedTime;    /* time for GoFind (seconds)    */
  211.     #endif
  212.     
  213.     #if DOTIMING
  214.         startTime = TickCount();
  215.     #endif
  216.     
  217.     /* note - BMSearch always starts from the beginning    */
  218.     /* of the string, so we have to offset the text and    */
  219.     /* add relative offsets                                                         */
  220.     
  221.     matchFound = BMSearch( 
  222.                                     *theTextH + *offsetP, 
  223.                                     GetHandleSize(theTextH) - *offsetP, 
  224.                                     &newOffset );
  225.     if (matchFound) *offsetP += newOffset;
  226.         
  227.     #if DOTIMING
  228.         endTime = TickCount();
  229.         elapsedTime = (endTime - startTime) / 60.0;
  230.     #endif
  231.     
  232.     return (matchFound);
  233. }
  234.  
  235. Boolean GetSearchString( void )
  236. {
  237.     short            itemHit; /* which item in dialog selected */
  238.     DialogPtr    theDialogP; /* pointer to modal dialog         */
  239.     short            itemType;     /* for GetDItem                             */
  240.     Handle        itemHandle; /* for GetDItem                             */
  241.     Rect            box;                /* for GetDItem                             */
  242.     
  243.     theDialogP = GetNewDialog(dlogSearch, NULL, -1L);
  244.     CtoPstr(searchString);
  245.     GetDItem(theDialogP, dlogText, &itemType, 
  246.                             &itemHandle, &box);
  247.     SetIText(itemHandle, searchString);
  248.     SelIText(theDialogP, dlogText, 0, 32767);
  249.     itemHit = dlogText;
  250.     while ( !(    (itemHit == dlogOK) || 
  251.                             (itemHit == dlogCancel)) )
  252.         ModalDialog(NULL, &itemHit);
  253.     if (itemHit == dlogOK) {
  254.         GetDItem(theDialogP, dlogText, &itemType, 
  255.                             &itemHandle, &box);
  256.         GetIText(itemHandle, searchString);
  257.         }
  258.     PtoCstr(searchString);    
  259.         /* convert back to C whether entered or not             */
  260.     DisposDialog(theDialogP);
  261.     
  262.     if (strlen(searchString) != 0) {
  263.         GetDelta0();
  264.         GetDelta2();
  265.         }
  266.     return( (itemHit == dlogOK) && 
  267.                     (strlen(searchString) != 0) );
  268. }
  269.  
  270. Boolean HaveSearchString( void )
  271. {
  272.     return (strlen(searchString) != 0);
  273. }
  274.